## study time and GPA regs, with cluster-adjusted standard errors

## have to make sure no missing observations
no_missing_both_sem_df_study<-both_sem_df[,c("pid", "male", "black", "hsgpa", "combact", "studyhs", "estudy", "sem_friends_study", "sem_study")]
no_missing_both_sem_df_study<-no_missing_both_sem_df_study[rowSums(is.na(no_missing_both_sem_df_study))==0,]

no_missing_both_sem_df_gpa<-both_sem_df[,c("pid", "male", "black", "hsgpa", "combact", "studyhs", "estudy", "sem_study", "gpa")]
no_missing_both_sem_df_gpa<-no_missing_both_sem_df_gpa[rowSums(is.na(no_missing_both_sem_df_gpa))==0,]


clustered_study_reg<-lm(sem_study ~ male + black + hsgpa + combact + studyhs + estudy + sem_friends_study, data=no_missing_both_sem_df_study)
clustered_gpa_reg<-lm(gpa ~ male + black + hsgpa + combact + studyhs + estudy + sem_study, data=no_missing_both_sem_df_gpa)

cl   <- function(dat,fm, cluster){
  require(sandwich, quietly = TRUE)
  require(lmtest, quietly = TRUE)
  M <- length(unique(cluster))
  N <- length(cluster)
  K <- fm$rank
  dfc <- (M/(M-1))*((N-1)/(N-K))
  uj  <- apply(estfun(fm),2, function(x) tapply(x, cluster, sum));
  vcovCL <- dfc*sandwich(fm, meat=crossprod(uj)/N)
  coeftest(fm, vcovCL) }

clust_study_reg_results<-cl(no_missing_both_sem_df_study, clustered_study_reg, no_missing_both_sem_df_study$pid)
clust_gpa_reg_results<-cl(no_missing_both_sem_df_gpa, clustered_gpa_reg, no_missing_both_sem_df_gpa$pid)

## TABLE 2
stargazer(clust_study_reg_results, clust_gpa_reg_results, title="Study and GPA regressions, pooled over both semesters and clustered", out="./output/sem_study_gpa_reg_clustered_student.tex", label="tab:sem_study_gpa_reg_clustered_student", add.lines=list(c("Clustered SE", "student", "student")))